import csv


def parse_block(block):
    """解析单个文本块，提取标题、关键字和摘要"""
    data = {"标题": "", "关键字": "", "摘要": ""}
    current_field = None

    for line in block.splitlines():
        if not line.strip():  # 跳过空行
            continue

        # 检查是否为字段标识行（两个大写字母开头）
        if len(line) > 3 and line[0].isupper() and line[1].isupper() and line[2] == ' ':
            field_code = line[:2]
            content = line[3:].strip()
            current_field = field_code

            # 只处理需要的字段
            if field_code == 'T1':
                data["标题"] = content
            elif field_code == 'K1':
                data["关键字"] = content
            elif field_code == 'AB':
                data["摘要"] = content
        elif current_field:  # 处理多行内容（续行）
            if current_field == 'T1':
                data["标题"] += ' ' + line.strip()
            elif current_field == 'K1':
                data["关键字"] += ' ' + line.strip()
            elif current_field == 'AB':
                data["摘要"] += ' ' + line.strip()

    return data


# 读取文本文件并分割成块
with open('C:\\Users\\lihaoyang6\\Desktop\\refworks.txt', 'r', encoding='gbk') as f:
    content = f.read()
    blocks = content.strip().split('\n\n')  # 用空行分割块

# 解析所有块
results = []
for block in blocks:
    if block.strip():  # 跳过空块
        results.append(parse_block(block))

# 写入CSV文件
with open('C:\\Users\\lihaoyang6\\Desktop\\output.csv', 'w', encoding='utf-8', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=["标题", "关键字", "摘要"])
    writer.writeheader()
    writer.writerows(results)

print("转换完成！已生成 output.csv 文件")